Load required libraries
library(STutility)
## Loading required package: Seurat
## Attaching SeuratObject
## Loading required package: ggplot2
## Registered S3 method overwritten by 'imager':
## method from
## plot.imlist
library(ggplot2)
library(ggpubr)
library(ggrastr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
library(ggbreak)
## ggbreak v0.0.9
##
## If you use ggbreak in published research, please cite the following
## paper:
##
## S Xu, M Chen, T Feng, L Zhan, L Zhou, G Yu. Use ggbreak to effectively
## utilize plotting space to deal with large datasets and outliers.
## Frontiers in Genetics. 2021, 12:774846. doi: 10.3389/fgene.2021.774846
Assemble spaceranger output files
TODO: make datasets available for download
# Mouse brain
samples1 <- Sys.glob(paths = "../../spaceranger_output/degraded_MB/*/filtered_feature_bc_matrix.h5")
imgs1 <- Sys.glob(paths = "../../spaceranger_output/degraded_MB/*/spatial/tissue_hires_image.png")
spotfiles1 <- Sys.glob(paths = "../../spaceranger_output/degraded_MB/*/spatial/tissue_positions_list.csv")
json1 <- Sys.glob(paths = "../../spaceranger_output/degraded_MB/*/spatial/scalefactors_json.json")
# Prostate cancer
samples2 <- Sys.glob(paths = "../../spaceranger_output/prostatecancer/*/filtered_feature_bc_matrix.h5")
imgs2 <- Sys.glob(paths = "../../spaceranger_output/prostatecancer/*/spatial/tissue_hires_image.png")
spotfiles2 <- Sys.glob(paths = "../../spaceranger_output/prostatecancer/*/spatial/tissue_positions_list.csv")
json2 <- Sys.glob(paths = "../../spaceranger_output/prostatecancer/*/spatial/scalefactors_json.json")
infoTable <- data.frame(samples = c(samples1, samples2), imgs = c(imgs1, imgs2), spotfiles = c(spotfiles1, spotfiles2), json = c(json1, json2))
infoTable <- cbind(infoTable, arrayID = do.call(rbind, strsplit(infoTable$samples, "/"))[, 5])
curated_metadata <- openxlsx::read.xlsx("../../sheets/curated_RNA_rescue_sample_metadata.xlsx", sheet = 3)
curated_metadata <- setNames(curated_metadata, nm = c("storage_time", "seq_date", "include", "RNA_rescue",
"project", "experimenter", "processer", "comments",
"ID", "paper_id", "RIN", "DV200", "protocol", "source", "arrayID", "spots_under_tissue",
"genes_detected", "fraction_spots_under_tissue",
"median_genes_per_spot", "median_UMIs_per_spot",
"saturation", "reads_mapped_to_probe_set",
"reads_mapped_confidently_to_probe_set",
"reads_mapped_confidently_to_filtered_probe_set",
"reads_mapped_to_genome",
"reads_mapped_confidently_to_genome",
"number_of_panel_genes"))
infoTable <- merge(infoTable, curated_metadata, by = "arrayID")
# Exclude FFPE data
infoTable <- subset(infoTable, !protocol %in% "FFPE")
Load mouse brain Visium data (4xRRST + 4xstandard)
# Subset infoTable to include mosuebrain data
MB_infoTable <- subset(infoTable, source == "mouse brain")
MB <- InputFromTable(infotable = MB_infoTable)
## Using spotfiles to remove spots outside of tissue
## Loading ../../spaceranger_output/degraded_MB/V10S29-134_A1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V10S29-134_B1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V10S29-134_C1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V10S29-134_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V11D08-304_A1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V11D08-304_B1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V11D08-304_C1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/degraded_MB/V11D08-304_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
##
## ------------- Filtering (not including images based filtering) --------------
## Spots removed: 0
## Genes removed: 9023
## Saving capture area ranges to Staffli object
## After filtering the dimensions of the experiment is: [23262 genes, 34597 spots]
Load H&E images into the MB object
MB <- LoadImages(MB, time.resolve = FALSE)
## Loading images for 8 samples:
## Reading ../../spaceranger_output/degraded_MB/V10S29-134_A1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 1 image from 1979x2000 pixels to 400x404 pixels
## Reading ../../spaceranger_output/degraded_MB/V10S29-134_B1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 2 image from 1979x2000 pixels to 400x404 pixels
## Reading ../../spaceranger_output/degraded_MB/V10S29-134_C1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 3 image from 1979x2000 pixels to 400x404 pixels
## Reading ../../spaceranger_output/degraded_MB/V10S29-134_D1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 4 image from 1979x2000 pixels to 400x404 pixels
## Reading ../../spaceranger_output/degraded_MB/V11D08-304_A1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 5 image from 2000x1937 pixels to 400x387 pixels
## Reading ../../spaceranger_output/degraded_MB/V11D08-304_B1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 6 image from 2000x1874 pixels to 400x375 pixels
## Reading ../../spaceranger_output/degraded_MB/V11D08-304_C1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 7 image from 2000x1937 pixels to 400x387 pixels
## Reading ../../spaceranger_output/degraded_MB/V11D08-304_D1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 8 image from 2000x1956 pixels to 400x391 pixels
ImagePlot(MB, ncols = 4)
Here we apply some rotations to make a rough alignment of the H&E images.
# Warp transform
MB <- WarpImages(MB, verbose = TRUE, transforms = list("1" = list(angle = 90), "2" = list(angle = -90),
"3" = list(angle = 90), "4" = list(angle = 90),
"5" = list(angle = -90), "6" = list(angle = -90),
"7" = list(angle = 90), "8" = list(angle = -90)))
## Creating dummy masks ...Loading masked image for sample 1 ...
## Warping pixel coordinates for 1 ...
## Warping image for 1 ...
## Warping image mask for 1 ...
## Finished alignment for sample1
##
## Loading masked image for sample 2 ...
## Warping pixel coordinates for 2 ...
## Warping image for 2 ...
## Warping image mask for 2 ...
## Finished alignment for sample2
##
## Loading masked image for sample 3 ...
## Warping pixel coordinates for 3 ...
## Warping image for 3 ...
## Warping image mask for 3 ...
## Finished alignment for sample3
##
## Loading masked image for sample 4 ...
## Warping pixel coordinates for 4 ...
## Warping image for 4 ...
## Warping image mask for 4 ...
## Finished alignment for sample4
##
## Loading masked image for sample 5 ...
## Warping pixel coordinates for 5 ...
## Warping image for 5 ...
## Warping image mask for 5 ...
## Finished alignment for sample5
##
## Loading masked image for sample 6 ...
## Warping pixel coordinates for 6 ...
## Warping image for 6 ...
## Warping image mask for 6 ...
## Finished alignment for sample6
##
## Loading masked image for sample 7 ...
## Warping pixel coordinates for 7 ...
## Warping image for 7 ...
## Warping image mask for 7 ...
## Finished alignment for sample7
##
## Loading masked image for sample 8 ...
## Warping pixel coordinates for 8 ...
## Warping image for 8 ...
## Warping image mask for 8 ...
## Finished alignment for sample8
MB$protocol <- MB[[]] %>%
mutate(protocol = ifelse(protocol == "RNA rescue", "RRST", protocol)) %>%
pull(protocol)
p <- ST.FeaturePlot(MB, features = "nFeature_RNA", ncol = 2, indices = c(1, 5), show.sb = FALSE,
label.by = "protocol", pt.size = 1.2) &
theme(plot.title = element_blank(), legend.position = "top",
legend.text = element_text(angle = 60, hjust = 1, size = 14, colour = "black"),
legend.title = element_text(size = 18, face = "bold", colour = "black", vjust = 1),
strip.text = element_text(size = 18, face = "bold", colour = "black")) &
labs(fill = "unique genes")
## Loading required namespace: viridis
p
jpeg(filename = "plots/mousebrain_unique_genes_spatial.jpg", width = 2200, height = 1400, res = 300)
print(p)
dev.off()
p <- ggplot(MB[[]], aes(arrayID, nFeature_RNA, fill = protocol)) +
geom_violin(scale = "width") +
geom_boxplot(width = 0.3) +
geom_hline(data = MB[[]] %>% dplyr::group_by(protocol) %>% summarize(Median = median(nFeature_RNA)),
aes("", yintercept = Median, group = protocol), linetype = "longdash") +
geom_label(data = MB[[]] %>% dplyr::group_by(protocol) %>% summarize(Median = median(nFeature_RNA)), size = 5,
aes("", y = Median, group = protocol, label = Median)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 14, colour = "black"),
axis.text.y = element_text(size = 14, colour = "black"),
strip.text = element_text(size = 18, face = "bold", colour = "black")) +
labs(y = "", x = "") +
facet_grid(~protocol, scales = "free") +
guides(fill = "none") +
scale_x_discrete(labels = c("", "Rep1", "Rep2", "Rep3", "Rep4"))
## Warning: Ignoring unknown aesthetics: x
p
# Export plot
jpeg(filename = "plots/mousebrain_unique_genes_violin.jpg", width = 2000, height = 900, res = 300)
print(p)
dev.off()
Calculate gene attributes for mouse brain data
gene_attr <- do.call(cbind, lapply(c("RRST", "standard"), function(pro) {
umis <- GetAssayData(MB, slot = "counts", assay = "RNA")[, MB$protocol %in% pro]
gene_attr_protocol <- data.frame(rowSums(umis), rowMeans(umis > 0), row.names = rownames(umis))
gene_attr_protocol <- setNames(gene_attr_protocol, nm = c(paste0(gsub(pattern = " ", replacement = "_", x = pro), "_count"),
paste0(gsub(pattern = " ", replacement = "_", x = pro), "_det_rate")))
return(gene_attr_protocol)
}))
gene_attr <- gene_attr %>%
mutate("RRST_log_count" = log1p(`RRST_count`), "standard_log_count" = log1p(`standard_count`))
Subset genes to include those available in the Visium FFPE probe set.
mouse_probes <- read.csv("sheets/Visium_Mouse_Transcriptome_Probe_Set_v1.0_mm10-2020-A.csv", skip = 5, header = TRUE)
mouse_probes$gene_name <- do.call(rbind, strsplit(mouse_probes$probe_id, "\\|"))[, 2]
p1 <- ggplot(gene_attr %>% mutate(gene = rownames(.)) %>%
subset(gene %in% mouse_probes$gene_name),
aes_string("standard_log_count", "RRST_log_count")) +
geom_point(size = 0.3) +
ggpubr::stat_cor(digits = 2) +
scale_x_continuous(limits = c(0, max(max(gene_attr$`RRST_log_count`), max(gene_attr$standard_log_count)))) +
scale_y_continuous(limits = c(0, max(max(gene_attr$`RRST_log_count`), max(gene_attr$standard_log_count)))) +
geom_abline(slope = 1, intercept = 0, color = "red", linetype = "longdash") +
labs(x = "standard protocol", y = "RRST", title = "log1p(UMI counts)") +
theme_minimal() +
theme(axis.text = element_text(size = 11, colour = "black"), axis.title = element_text(size = 14, colour = "black"),
plot.title = element_text(size = 14, face = "bold", colour = "black"))
p2 <- ggplot(gene_attr, aes_string("standard_det_rate", "RRST_det_rate")) +
geom_point(size = 0.3) +
geom_abline(slope = 1, intercept = 0, color = "red", linetype = "longdash") +
labs(x = "standard protocol", y = "RRST", title = "detection rate") +
theme_minimal() +
theme(axis.text = element_text(size = 11, colour = "black"), axis.title = element_text(size = 14, colour = "black"),
plot.title = element_text(size = 14, face = "bold", colour = "black"))
p1 + p2
# Export plots
jpeg(filename = "plots/mousebrain_gene_scatter_counts.jpg", width = 1000, height = 1000, res = 300)
print(p1)
dev.off()
jpeg(filename = "plots/mousebrain_gene_scatter_det_rater.jpg", width = 1000, height = 1000, res = 300)
print(p2)
dev.off()
gene_annotations <- read.table(file = "sheets/mgenes.tsv", header = TRUE)
rownames(gene_annotations) <- gene_annotations$gene_name
# rename gene type for mitochondrial and ribosomal protein coding genes
rp.genes <- grep(pattern = "^Rpl|^Rps", x = rownames(gene_attr), value = TRUE)
mt.genes <- grep(pattern = "^mt-", x = rownames(gene_attr), value = TRUE)
gene_annotations$gene_type <- ifelse(gene_annotations$gene_name %in% rp.genes, "ribosomal\nprotein_coding", gene_annotations$gene_type)
gene_annotations$gene_type <- ifelse(gene_annotations$gene_name %in% mt.genes, "mitochondrial", gene_annotations$gene_type)
gene_attr$gene_type <- gene_annotations[rownames(gene_attr), "gene_type"]
gene_attr <- gene_attr %>%
mutate(observed_in = case_when(RRST_count > 0 & standard_count > 0 ~ "both",
RRST_count > 0 & standard_count == 0 ~ "RRST",
RRST_count == 0 & standard_count > 0 ~ "standard")) %>%
mutate(gene = rownames(.))
lvls <- gene_attr %>%
group_by(gene_type) %>%
summarize(tot = sum(RRST_count + standard_count)) %>%
arrange(-tot) %>%
pull(gene_type)
gene_attr_summarized <- gene_attr %>%
reshape2::melt(measure.vars = c("RRST_count", "standard_count")) %>%
filter(value > 0) %>%
group_by(gene_type, variable) %>%
summarize(totTranscript = n(), totCount = sum(value)) %>%
mutate(gene_type = factor(gene_type, levels = lvls),
variable = ifelse(variable == "RRST_count", "RRST", "standard"))
## `summarise()` has grouped output by 'gene_type'. You can override using the
## `.groups` argument.
gene_attr_prop <- gene_attr %>%
group_by(gene_type, observed_in) %>%
summarize(Freq = n()) %>%
group_by(gene_type) %>%
mutate(prop = Freq/sum(Freq)) %>%
mutate(gene_type = factor(gene_type, levels = lvls))
## `summarise()` has grouped output by 'gene_type'. You can override using the
## `.groups` argument.
p1 <- ggplot(gene_attr_summarized,
aes(gene_type, totTranscript, fill = variable)) +
geom_bar(stat = "identity", position = position_dodge(preserve = 'single')) +
scale_y_cut(breaks = 100, which = c(1, 2), scales = c(1, 2), space = 0.5) +
theme_minimal() +
theme(axis.text.x = element_blank(), axis.title.x = element_blank(),
axis.text.y = element_text(size = 12, colour = "black"),
axis.title = element_text(size = 14, colour = "black"),
legend.text = element_text(size = 14, colour = "black"),
legend.title = element_text(size = 14, colour = "black")) +
labs(y = "transcripts detected", fill = "")
p2 <- ggplot(gene_attr_prop, aes("", prop, fill = observed_in)) +
geom_bar(stat = "identity", color = "black") +
coord_polar("y", start = 0) +
scale_fill_brewer(palette = "Spectral") +
facet_wrap(~gene_type, strip.position = "bottom", ncol = 10) +
theme_void() +
theme(strip.text = element_text(angle = 90, vjust = 0.5, hjust = 1, margin = margin(0.2, 0, 0.2, 0, "cm"), colour = "black", size = 12),
legend.text = element_text(size = 14, colour = "black"),
legend.title = element_text(size = 14, colour = "black"))
p3 <- ggplot(gene_attr_summarized,
aes(gene_type, totCount, fill = variable)) +
geom_bar(stat = "identity", position = position_dodge(preserve = 'single')) +
scale_y_cut(breaks = 1e5, which = c(1, 2), scales = c(1, 2), space = 0.5) +
theme_minimal() +
theme(axis.text.x = element_blank(), axis.title.x = element_blank(),
axis.text.y = element_text(size = 12, colour = "black"),
axis.title = element_text(size = 14, colour = "black"),
legend.text = element_text(size = 14, colour = "black"),
legend.title = element_text(size = 14, colour = "black")) +
labs(y = "UMI count", fill = "")
p1
p2
p3
jpeg("../Suppl_figures/Suppl_Figure_mousebrain_and_prostatecancer/mousebrain_biotype_content1.jpg", width = 2500, height = 500, res = 200, bg = NA)
print(p1)
dev.off()
jpeg("../Suppl_figures/Suppl_Figure_mousebrain_and_prostatecancer/mousebrain_biotype_content2.jpg", width = 2500, height = 500, res = 200, bg = NA)
print(p2)
dev.off()
jpeg("../Suppl_figures/Suppl_Figure_mousebrain_and_prostatecancer/mousebrain_biotype_content3.jpg", width = 2500, height = 500, res = 200, bg = NA)
print(p3)
dev.off()
Load prostate cancer data (2xRRST and 2xstandard)
PC_infoTable <- subset(infoTable, source == "prostate cancer")
PC <- InputFromTable(infotable = PC_infoTable[c(3:4, 1:2), ])
## Using spotfiles to remove spots outside of tissue
## Loading ../../spaceranger_output/prostatecancer/V11M22-349_C1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/prostatecancer/V11M22-349_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/prostatecancer/V11A20-405_C1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/prostatecancer/V11A20-405_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
##
## ------------- Filtering (not including images based filtering) --------------
## Spots removed: 0
## Genes removed: 11352
## Saving capture area ranges to Staffli object
## After filtering the dimensions of the experiment is: [25249 genes, 15684 spots]
Prostate cancer H&E images
PC <- LoadImages(PC, time.resolve = FALSE)
## Loading images for 4 samples:
## Reading ../../spaceranger_output/prostatecancer/V11M22-349_C1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 1 image from 2000x1937 pixels to 400x387 pixels
## Reading ../../spaceranger_output/prostatecancer/V11M22-349_D1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 2 image from 2000x1937 pixels to 400x387 pixels
## Reading ../../spaceranger_output/prostatecancer/V11A20-405_C1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 3 image from 1979x2000 pixels to 400x404 pixels
## Reading ../../spaceranger_output/prostatecancer/V11A20-405_D1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 4 image from 1979x2000 pixels to 400x404 pixels
ImagePlot(PC, ncols = 4)
Apply rigid transformations.
# Warp transform
PC <- WarpImages(PC, verbose = TRUE, transforms = list("1" = list(shift.y = -130), "3" = list(angle = 180)))
## Creating dummy masks ...Loading masked image for sample 1 ...
## Warping pixel coordinates for 1 ...
## Warping image for 1 ...
## Warping image mask for 1 ...
## Finished alignment for sample1
##
## Loading masked image for sample 3 ...
## Warping pixel coordinates for 3 ...
## Warping image for 3 ...
## Warping image mask for 3 ...
## Finished alignment for sample3
PC$protocol <- PC[[]] %>%
mutate(protocol = ifelse(protocol == "RNA rescue", "RRST", protocol)) %>%
pull(protocol)
p <- ST.FeaturePlot(PC, features = "nFeature_RNA", ncol = 2, indices = c(3, 2),
label.by = "protocol", pt.size = 1.2, show.sb = FALSE) &
theme(plot.title = element_blank(), legend.position = "top",
legend.text = element_text(angle = 60, hjust = 1, size = 14, colour = "black"),
legend.title = element_text(size = 18, face = "bold", colour = "black", vjust = 1),
strip.text = element_text(size = 18, face = "bold", colour = "black")) &
labs(fill = "unique genes")
p
# Export plot
jpeg(filename = "plots/prostatecancer_unique_genes_spatial.jpg", width = 2200, height = 1400, res = 300)
print(p)
dev.off()
p <- ggplot(PC[[]], aes(arrayID, nFeature_RNA, fill = protocol)) +
geom_violin(scale = "width") +
geom_boxplot(width = 0.3) +
geom_hline(data = PC[[]] %>% dplyr::group_by(protocol) %>% summarize(Median = median(nFeature_RNA)),
aes("", yintercept = Median, group = protocol), linetype = "longdash") +
geom_label(data = PC[[]] %>% dplyr::group_by(protocol) %>% summarize(Median = median(nFeature_RNA)),
aes("", y = Median, group = protocol, label = Median)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 14, colour = "black"),
axis.text.y = element_text(size = 14, colour = "black"),
strip.text = element_text(size = 18, face = "bold", colour = "black")) +
labs(y = "", x = "") +
facet_grid(~protocol, scales = "free") +
guides(fill = "none") +
scale_x_discrete(labels = c("", "Rep1", "Rep2"))
## Warning: Ignoring unknown aesthetics: x
p
jpeg(filename = "plots/prostatecancer_unique_genes_violin.jpg", width = 2000, height = 900, res = 300)
print(p)
dev.off()
Compute gene attributes
gene_attr <- do.call(cbind, lapply(c("RRST", "standard"), function(pro) {
umis <- GetAssayData(PC, slot = "counts", assay = "RNA")[, PC$protocol %in% pro]
gene_attr_protocol <- data.frame(rowSums(umis), rowMeans(umis > 0), row.names = rownames(umis))
gene_attr_protocol <- setNames(gene_attr_protocol, nm = c(paste0(gsub(pattern = " ", replacement = "_", x = pro), "_count"),
paste0(gsub(pattern = " ", replacement = "_", x = pro), "_det_rate")))
return(gene_attr_protocol)
}))
gene_attr <- gene_attr %>%
mutate("RRST_log_count" = log1p(`RRST_count`), "standard_log_count" = log1p(`standard_count`))
Subset genes to include those available in the Visium FFPE probe set.
human_probes <- read.csv("../../sheets/Visium_Human_Transcriptome_Probe_Set_v1.0_GRCh38-2020-A.csv", skip = 5, header = TRUE)
human_probes$gene_name <- do.call(rbind, strsplit(human_probes$probe_id, "\\|"))[, 2]
p1 <- ggplot(gene_attr %>% mutate(gene = rownames(.)) %>% subset(gene %in% human_probes$gene_name), aes_string("standard_log_count", "RRST_log_count")) +
geom_point(size = 0.3) +
ggpubr::stat_cor() +
scale_x_continuous(limits = c(0, max(max(gene_attr$`RRST_log_count`), max(gene_attr$standard_log_count)))) +
scale_y_continuous(limits = c(0, max(max(gene_attr$`RRST_log_count`), max(gene_attr$standard_log_count)))) +
geom_abline(slope = 1, intercept = 0, color = "red", linetype = "longdash") +
labs(x = "standard protocol", y = "RRST", title = "log1p(UMI counts)") +
theme_minimal() +
theme(axis.text = element_text(size = 11, colour = "black"), axis.title = element_text(size = 14, colour = "black"),
plot.title = element_text(size = 14, face = "bold", colour = "black"))
p2 <- ggplot(gene_attr, aes_string("standard_det_rate", "RRST_det_rate")) +
geom_point(size = 0.3) +
geom_abline(slope = 1, intercept = 0, color = "red", linetype = "longdash") +
labs(x = "standard protocol", y = "RRST", title = "detection rate") +
theme_minimal() +
theme(axis.text = element_text(size = 11, colour = "black"), axis.title = element_text(size = 14, colour = "black"),
plot.title = element_text(size = 14, face = "bold", colour = "black"))
p1 + p2
jpeg(filename = "plots/prostatecancer_gene_scatter_counts.jpg", width = 1000, height = 1000, res = 300)
print(p1)
dev.off()
jpeg(filename = "plots/prostatecancer_gene_scatter_det_rate.jpg", width = 1000, height = 1000, res = 300)
print(p2)
dev.off()
gene_annotations <- read.table(file = "../../genes/hgenes.tsv", header = TRUE)
rownames(gene_annotations) <- gene_annotations$gene_name
# rename gene type for mitochondrial and ribosomal protein coding genes
rp.genes <- grep(pattern = "^RPL|^RPS", x = rownames(gene_attr), value = TRUE)
mt.genes <- grep(pattern = "^MT-", x = rownames(gene_attr), value = TRUE)
gene_annotations$gene_type <- ifelse(gene_annotations$gene_name %in% rp.genes, "ribosomal\nprotein_coding", gene_annotations$gene_type)
gene_annotations$gene_type <- ifelse(gene_annotations$gene_name %in% mt.genes, "mitochondrial", gene_annotations$gene_type)
gene_attr$gene_type <- gene_annotations[rownames(gene_attr), "gene_type"]
gene_attr <- gene_attr %>%
mutate(observed_in = case_when(RRST_count > 0 & standard_count > 0 ~ "both",
RRST_count > 0 & standard_count == 0 ~ "RNA_rescue",
RRST_count == 0 & standard_count > 0 ~ "standard")) %>%
mutate(gene = rownames(.))
gene_attr <- subset(gene_attr, !gene_type %in% c("IG_C_pseudogene", "TR_V_pseudogene"))
lvls <- gene_attr %>%
group_by(gene_type) %>%
summarize(tot = sum(RRST_count + standard_count)) %>%
arrange(-tot) %>%
pull(gene_type)
gene_attr_summarized <- gene_attr %>%
reshape2::melt(measure.vars = c("RRST_count", "standard_count")) %>%
filter(value > 0) %>%
group_by(gene_type, variable) %>%
summarize(totTranscript = n(), totCount = sum(value)) %>%
mutate(gene_type = factor(gene_type, levels = lvls),
variable = ifelse(variable == "RRST_count", "RRST", "standard"))
## `summarise()` has grouped output by 'gene_type'. You can override using the
## `.groups` argument.
gene_attr_prop <- gene_attr %>%
group_by(gene_type, observed_in) %>%
summarize(Freq = n()) %>%
group_by(gene_type) %>%
mutate(prop = Freq/sum(Freq)) %>%
mutate(gene_type = factor(gene_type, levels = lvls))
## `summarise()` has grouped output by 'gene_type'. You can override using the
## `.groups` argument.
p1 <- ggplot(gene_attr_summarized,
aes(gene_type, totTranscript, fill = variable)) +
geom_bar(stat = "identity", position = position_dodge(preserve = 'single')) +
scale_y_cut(breaks = 100, which = c(1, 2), scales = c(1, 2), space = 0.5) +
theme_minimal() +
theme(axis.text.x = element_blank(), axis.title.x = element_blank(),
axis.text.y = element_text(size = 12, colour = "black"),
axis.title = element_text(size = 14, colour = "black"),
legend.text = element_text(size = 14, colour = "black"),
legend.title = element_text(size = 14, colour = "black")) +
labs(y = "transcripts detected", fill = "")
p2 <- ggplot(gene_attr_prop, aes("", prop, fill = observed_in)) +
geom_bar(stat = "identity", color = "black") +
coord_polar("y", start = 0) +
scale_fill_brewer(palette = "Spectral") +
facet_wrap(~gene_type, strip.position = "bottom", ncol = 10) +
theme_void() +
theme(strip.text = element_text(angle = 90, vjust = 0.5, hjust = 1, margin = margin(0.2, 0, 0.2, 0, "cm"), colour = "black", size = 12),
legend.text = element_text(size = 14, colour = "black"),
legend.title = element_text(size = 14, colour = "black"))
p3 <- ggplot(gene_attr_summarized,
aes(gene_type, totCount, fill = variable)) +
geom_bar(stat = "identity", position = position_dodge(preserve = 'single')) +
scale_y_cut(breaks = 1e5, which = c(1, 2), scales = c(1, 2), space = 0.5) +
theme_minimal() +
theme(axis.text.x = element_blank(), axis.title.x = element_blank(),
axis.text.y = element_text(size = 12, colour = "black"),
axis.title = element_text(size = 14, colour = "black"),
legend.text = element_text(size = 14, colour = "black"),
legend.title = element_text(size = 14, colour = "black")) +
labs(y = "UMI count", fill = "")
p1
p2
p3
jpeg("../Suppl_figures/Suppl_Figure_mousebrain_and_prostatecancer/prostatecancer_biotype_content1.jpg", width = 2500, height = 500, res = 200, bg = NA)
print(p1)
dev.off()
jpeg("../Suppl_figures/Suppl_Figure_mousebrain_and_prostatecancer//prostatecancer_biotype_content2.jpg", width = 2500, height = 500, res = 200, bg = NA)
print(p2)
dev.off()
jpeg("../Suppl_figures/Suppl_Figure_mousebrain_and_prostatecancer//prostatecancer_biotype_content3.jpg", width = 2500, height = 500, res = 200, bg = NA)
print(p3)
dev.off()
date()
## [1] "Thu Aug 25 09:43:29 2022"
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/ludviglarsson/anaconda3/envs/R4.1/lib/libopenblasp-r0.3.20.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggbreak_0.0.9 magrittr_2.0.3 dplyr_1.0.8 ggrastr_1.0.1
## [5] ggpubr_0.4.0 STutility_0.1.0 ggplot2_3.3.5 SeuratObject_4.0.4
## [9] Seurat_4.1.0
##
## loaded via a namespace (and not attached):
## [1] backports_1.4.1 plyr_1.8.7 igraph_1.3.0
## [4] lazyeval_0.2.2 sp_1.4-6 splines_4.1.3
## [7] listenv_0.8.0 scattermore_0.8 digest_0.6.29
## [10] yulab.utils_0.0.4 htmltools_0.5.2 viridis_0.6.2
## [13] magick_2.7.3 tiff_0.1-11 fansi_1.0.3
## [16] tensor_1.5 cluster_2.1.3 ROCR_1.0-11
## [19] openxlsx_4.2.5 globals_0.14.0 matrixStats_0.62.0
## [22] spatstat.sparse_2.1-1 jpeg_0.1-9 colorspace_2.0-3
## [25] ggrepel_0.9.1 xfun_0.30 crayon_1.5.1
## [28] jsonlite_1.8.0 spatstat.data_2.2-0 zeallot_0.1.0
## [31] survival_3.3-1 zoo_1.8-10 glue_1.6.2
## [34] polyclip_1.10-0 gtable_0.3.0 leiden_0.3.9
## [37] car_3.0-13 future.apply_1.8.1 abind_1.4-5
## [40] scales_1.2.0 DBI_1.1.2 rstatix_0.7.0
## [43] spatstat.random_2.2-0 miniUI_0.1.1.1 Rcpp_1.0.8.3
## [46] viridisLite_0.4.0 xtable_1.8-4 gridGraphics_0.5-1
## [49] reticulate_1.24 spatstat.core_2.4-2 bit_4.0.4
## [52] htmlwidgets_1.5.4 httr_1.4.2 RColorBrewer_1.1-3
## [55] ellipsis_0.3.2 ica_1.0-2 farver_2.1.0
## [58] pkgconfig_2.0.3 sass_0.4.1 uwot_0.1.11
## [61] deldir_1.0-6 utf8_1.2.2 labeling_0.4.2
## [64] ggplotify_0.1.0 tidyselect_1.1.2 rlang_1.0.2
## [67] reshape2_1.4.4 later_1.3.0 munsell_0.5.0
## [70] tools_4.1.3 cli_3.2.0 generics_0.1.2
## [73] broom_0.8.0 ggridges_0.5.3 evaluate_0.15
## [76] stringr_1.4.0 fastmap_1.1.0 yaml_2.3.5
## [79] goftest_1.2-3 bit64_4.0.5 knitr_1.38
## [82] fitdistrplus_1.1-8 zip_2.2.0 purrr_0.3.4
## [85] RANN_2.6.1 readbitmap_0.1.5 pbapply_1.5-0
## [88] future_1.24.0 nlme_3.1-157 mime_0.12
## [91] aplot_0.1.4 hdf5r_1.3.5 compiler_4.1.3
## [94] rstudioapi_0.13 beeswarm_0.4.0 plotly_4.10.0
## [97] png_0.1-7 ggsignif_0.6.3 spatstat.utils_2.3-0
## [100] tibble_3.1.6 bslib_0.3.1 stringi_1.7.6
## [103] highr_0.9 lattice_0.20-45 Matrix_1.4-1
## [106] shinyjs_2.1.0 vctrs_0.4.1 pillar_1.7.0
## [109] lifecycle_1.0.1 spatstat.geom_2.4-0 lmtest_0.9-40
## [112] jquerylib_0.1.4 RcppAnnoy_0.0.19 data.table_1.14.2
## [115] cowplot_1.1.1 irlba_2.3.5 raster_3.5-15
## [118] httpuv_1.6.5 patchwork_1.1.1 R6_2.5.1
## [121] imager_0.42.13 promises_1.2.0.1 KernSmooth_2.23-20
## [124] gridExtra_2.3 bmp_0.3 vipor_0.4.5
## [127] parallelly_1.31.0 codetools_0.2-18 MASS_7.3-56
## [130] assertthat_0.2.1 withr_2.5.0 sctransform_0.3.3
## [133] mgcv_1.8-40 parallel_4.1.3 terra_1.5-21
## [136] ggfun_0.0.6 grid_4.1.3 rpart_4.1.16
## [139] tidyr_1.2.0 rmarkdown_2.13 carData_3.0-5
## [142] Rtsne_0.16 shiny_1.7.1 ggbeeswarm_0.6.0